import json
import openai
from tqdm import tqdm
import pandas as pd
import argparse
import os
import sys
from typing import Dict, List, Optional
from collections import Counter
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import re  # Added for parsing persona responses

def parse_args() -> argparse.Namespace:
    """Parse command-line arguments for parallel execution."""
    parser = argparse.ArgumentParser(
        description="Run persona-based emotion classification over a slice of the dataset.",
    )
    parser.add_argument("--start", type=int, default=0, help="Start index (inclusive) of the slice.")
    parser.add_argument("--end", type=int, default=None, help="End index (inclusive) of the slice.")
    parser.add_argument("--output_dir", type=str, default="emotion_results", help="Directory to write JSON results.")
    parser.add_argument("--csv_path", type=str, required=True, help="Path to the input CSV with columns video_id,story")
    return parser.parse_args()

# ---------------------------------------------------------------------------
# Updated persona prompts in Reason/Answer format
# ---------------------------------------------------------------------------
persona_prompts = {
    "18-24_female": """You are a woman aged 18–24 who intuitively understands what resonates with your generation—bold aesthetics, authenticity, humor, pop culture references, and individuality.

You will be given an *emotion_vocab* dictionary that lists emotions (e.g. cheerful, angry, calm) with short definitions. You are then shown the **story** of a video advertisement.

Your task is to choose the SINGLE most relevant emotion key from *emotion_vocab* that most accurately captures how viewers are likely to feel while watching this advertisement.

Return exactly two lines:
Reason: <brief justification in one sentence>
Answer: <emotion_key>
Only output the emotion key after "Answer:".""",

    "18-24_male": """You are a man aged 18–24 who knows what grabs young men's attention—humor, edge, cultural references, and visual flair.

You will be given an *emotion_vocab* dictionary that lists emotions (e.g. cheerful, angry, calm) with short definitions. You are then shown the **story** of a video advertisement.

Your task is to select the SINGLE most relevant emotion key from *emotion_vocab* that best represents how viewers are likely to feel.

Return exactly two lines:
Reason: <brief justification in one sentence>
Answer: <emotion_key>
Only output the emotion key after "Answer:".""",

    "25-34_female": """You are a woman aged 25–34 who connects with content that is visually refined, emotionally resonant, and aligned with lifestyle interests—career, wellness, and relationships.

You will be given an *emotion_vocab* dictionary that lists emotions with short definitions. You are then shown the **story** of a video advertisement.

Your task is to choose the SINGLE most relevant emotion key from *emotion_vocab* that best represents how the advertisement makes viewers feel.

Return exactly two lines:
Reason: <brief justification in one sentence>
Answer: <emotion_key>
Only output the emotion key after "Answer:".""",

    "25-34_male": """You are a man aged 25–34 who appreciates content that shows ambition, clarity, innovation, fitness, and smart humor.

You will be given an *emotion_vocab* dictionary that lists emotions with short definitions. You are then shown the **story** of a video advertisement.

Your task is to choose the SINGLE most relevant emotion key from *emotion_vocab* that best captures viewer sentiment.

Return exactly two lines:
Reason: <brief justification in one sentence>
Answer: <emotion_key>
Only output the emotion key after "Answer:".""",

    "35-44_female": """You are a woman aged 35–44 who is drawn to emotionally intelligent storytelling, depth, and purpose.

You will be given an *emotion_vocab* dictionary that lists emotions with short definitions. You are then shown the **story** of a video advertisement.

Your task is to select the SINGLE most relevant emotion key from *emotion_vocab* that best reflects the emotional resonance of the story.

Return exactly two lines:
Reason: <brief justification in one sentence>
Answer: <emotion_key>
Only output the emotion key after "Answer:".""",

    "35-44_male": """You are a man aged 35–44 who connects with grounded, aspirational content about family, success, and purpose.

You will be given an *emotion_vocab* dictionary that lists emotions with short definitions. You are then shown the **story** of a video advertisement.

Your task is to select the SINGLE most relevant emotion key from *emotion_vocab* that best captures viewer sentiment.

Return exactly two lines:
Reason: <brief justification in one sentence>
Answer: <emotion_key>
Only output the emotion key after "Answer:".""",

    "45-54_female": """You are a woman aged 45–54 who appreciates visuals and stories that carry meaning, clarity, and purpose.

You will be given an *emotion_vocab* dictionary that lists emotions with short definitions. You are then shown the **story** of a video advertisement.

Your task is to choose the SINGLE most relevant emotion key from *emotion_vocab* that best matches viewer feelings.

Return exactly two lines:
Reason: <brief justification in one sentence>
Answer: <emotion_key>
Only output the emotion key after "Answer:".""",

    "45-54_male": """You are a man aged 45–54 who values storytelling that emphasizes responsibility, growth, trust, and wisdom.

You will be given an *emotion_vocab* dictionary that lists emotions with short definitions. You are then shown the **story** of a video advertisement.

Your task is to select the SINGLE most relevant emotion key from *emotion_vocab* that aligns best with audience sentiment.

Return exactly two lines:
Reason: <brief justification in one sentence>
Answer: <emotion_key>
Only output the emotion key after "Answer:".""",

    "55+_female": """You are a woman aged 55 or older who resonates with content that conveys warmth, legacy, and deep emotional meaning.

You will be given an *emotion_vocab* dictionary that lists emotions with short definitions. You are then shown the **story** of a video advertisement.

Your task is to select the SINGLE most relevant emotion key from *emotion_vocab* that best describes the emotional takeaway.

Return exactly two lines:
Reason: <brief justification in one sentence>
Answer: <emotion_key>
Only output the emotion key after "Answer:".""",

    "55+_male": """You are a man aged 55 or older who prefers storytelling with sincerity, meaning, and timeless values.

You will be given an *emotion_vocab* dictionary that lists emotions with short definitions. You are then shown the **story** of a video advertisement.

Your task is to select the SINGLE most relevant emotion key from *emotion_vocab* that best captures viewer sentiment.

Return exactly two lines:
Reason: <brief justification in one sentence>
Answer: <emotion_key>
Only output the emotion key after "Answer:".""",
}


# Emotion vocabulary
topics = "Emotion_vocab = {'active': 'active(energetic, adventurous, vibrant, enthusiastic, playful)', 'afraid': 'afraid(horrified, scared, fearful)', 'alarmed': 'alarmed(concerned, worried, anxious, overwhelmed)', 'alert': 'alert(attentive, curious)', 'amazed': 'amazed(surprised, astonished, awed, fascinated, intrigued)', 'amused': 'amused(humored, laughing)', 'angry': 'angry(annoyed, irritated)', 'calm': 'calm(soothed, peaceful, comforted, fullfilled, cozy)', 'cheerful': 'cheerful(delighted, happy, joyful, carefree, optimistic)', 'confident': 'confident(assured, strong, healthy)', 'conscious': 'conscious(aware, thoughtful, prepared)', 'creative': 'creative(inventive, productive)', 'disturbed': 'disturbed(disgusted, shocked)', 'eager': 'eager(hungry, thirsty, passionate)', 'educated': 'educated(informed, enlightened, smart, savvy, intelligent)', 'emotional': 'emotional(vulnerable, moved, nostalgic, reminiscent)', 'empathetic': 'empathetic(sympathetic, supportive, understanding, receptive)', 'fashionable': 'fashionable(trendy, elegant, beautiful, attractive, sexy)', 'feminine': 'feminine(womanly, girlish)', 'grateful': 'grateful(thankful)', 'inspired': 'inspired(motivated, ambitious, empowered, determined)', 'jealous': 'jealous', 'loving': 'loving(loved, romantic)', 'manly': 'manly', 'persuaded': 'persuaded(impressed, enchanted, immersed)', 'pessimistic': 'pessimistic(skeptical)', 'proud': 'proud(patriotic)', 'sad': 'sad(depressed, upset, betrayed, distant)', 'thrifty': 'thrifty(frugal)', 'youthful': 'youthful(childlike)'}"

def main():
    args = parse_args()
    os.makedirs(args.output_dir, exist_ok=True)

    # Load Qwen3-32B model for local inference
    global model, tokenizer
    model_name = "Qwen/Qwen3-32B"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype="auto",
        device_map="auto",
        load_in_4bit=True,
    )

    # Load CSV data
    try:
        df = pd.read_csv(args.csv_path)
    except Exception as e:
        print(f"Error reading CSV {args.csv_path}: {e}")
        sys.exit(1)

    all_records = df.to_dict(orient='records')

    # Determine slice for this run
    start_idx = args.start
    end_idx = len(all_records) - 1 if args.end is None else min(args.end, len(all_records) - 1)
    slice_records = all_records[start_idx : end_idx + 1]

    print(f"Processing slice {start_idx}–{end_idx} (n={len(slice_records)})")

    results = []
    output_path = os.path.join(args.output_dir, f"emotion_results_{start_idx}_{end_idx}.json")

    for rec in tqdm(slice_records, desc=f"Persona-Emotion Eval {start_idx}-{end_idx}"):
        try:
            video_id = str(rec.get('video_id', '')).strip()
            story_text = rec.get('story', '')
            cleaned_text = ' '.join(str(story_text).split()).replace('\n', '').replace('\f', '')

            persona_predictions = {}
            for persona_name, sys_prompt in persona_prompts.items():
                messages = [
                    {"role": "system", "content": sys_prompt},
                    {
                        "role": "user", 
                        "content": f"{topics}\n\nStory: {cleaned_text}"
                    }
                ]
                
                try:
                    # Qwen inference
                    input_ids = tokenizer.apply_chat_template(
                        messages,
                        tokenize=True,
                        add_generation_prompt=True,
                        return_tensors="pt",
                        enable_thinking=False,
                    ).to(model.device)

                    with torch.no_grad():
                        outputs = model.generate(
                            input_ids=input_ids,
                            max_new_tokens=300,
                            temperature=0.85,
                            do_sample=True,
                            min_p=0.1,
                        )

                    raw_resp = tokenizer.decode(
                        outputs[0][len(input_ids[0]):], skip_special_tokens=True
                    ).strip()
 
                    # Extract reason (first line starting with Reason:)
                    reason_match = re.search(r"(?i)^reason:\s*(.+)$", raw_resp, re.MULTILINE)
                    reason_text = reason_match.group(1).strip() if reason_match else ""

                    # Extract topic key from the 'Answer:' line (case-insensitive)
                    answer_match = re.search(r"(?i)^answer:\s*([^\s\.,;\n]+)", raw_resp, re.MULTILINE)
                    if answer_match:
                        pred_topic = answer_match.group(1).strip().lower().strip("'\". ,")
                    else:
                        # Fallback: take last word of the response (after stripping punctuation)
                        pred_topic = raw_resp.split()[-1].lower().strip("'\". ,")

                    persona_predictions[persona_name] = {
                        'topic': pred_topic,
                        'reason': reason_text,
                        'raw_response': raw_resp,
                    }
                except Exception as e:
                    print(f"Error during Qwen inference for key {video_id}, persona {persona_name}: {e}")
                    persona_predictions[persona_name] = "error"

            # Majority vote for the final topic
            if persona_predictions:
                # Collect topics excluding errors
                valid_preds = [p['topic'] for p in persona_predictions.values() if p['topic'] != "error"]
                if valid_preds:
                    final_topic = Counter(valid_preds).most_common(1)[0][0]
                else:
                    final_topic = "error_no_valid_predictions"
            else:
                final_topic = "error_no_predictions"

            # Store results
            result_item = {
                'video_id': video_id,
                'url': f"https://www.youtube.com/watch?v={video_id}" if video_id else "",
                'story': cleaned_text,
                'persona_predictions': persona_predictions,
                'final_topic': final_topic
            }
            results.append(result_item)
            
            # Incremental save
            with open(output_path, 'w') as f:
                json.dump(results, f, indent=4)

        except Exception as e:
            print(f"Error processing key {video_id}: {e}")
            continue

    print(f"Finished processing. Results saved to {output_path}")

if __name__ == "__main__":
    main()




